package com.doit.day04

import scala.io.Source

object _05_pvuv {
  def main(args: Array[String]): Unit = {
    val list: List[String] = Source.fromFile("data/input/pvuv.txt").getLines().toList
    val maped: List[List[String]] = list.map(line => {
      val list1: List[String] = line.split(",").toList
      list1
    })

    //过滤脏数据
    val tuples: List[(String, String)] = maped.filter(list => {
      //List(site2, , 2018-03-08 11:12:22)  List(site4, user7)
      list.size == 3 && list(0) != "" && list(1) != "" && list(2) != ""
    }).map(list => {
      (list(1), list(2).substring(0, 10))
    })

    tuples.groupBy(_._2)
      .map(tp=>{
        val dt: String = tp._1
        val value: List[(String, String)] = tp._2
        val pv: Int = value.size
        val uv: Int = value.map(_._1).distinct.size
        (dt,pv,uv)
      }).foreach(println)

  }

}
